import transformers
from transformers import BertTokenizer, BertModel
import numpy as np

# 加载 BERT 模型的分词器和模型
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
model = BertModel.from_pretrained('bert-base-chinese')

list=[]
# 将文本转化为 BERT 模型能够处理的输入格式
text = "我喜欢今天的天气，和你的笑容一样灿烂"
text2="迎着阳光盛大逃亡，我会带你走出这里，我保证"
input_ids = tokenizer.encode(text,max_length=512,truncation=True,return_tensors='pt')
output = model(input_ids)[0]
print(output.shape)
print('---------------------')
output=output[:,0,:].detach().cpu().numpy()
print(output.shape)

input_ids2 = tokenizer.encode(text2,max_length=512,truncation=True,return_tensors='pt')
output2 = model(input_ids2)[0]
print('---------------------')
output2=output2[:,0,:].detach().cpu().numpy()
vec = np.concatenate((output, output2), axis=0)
vec=vec.reshape(-1)
print(vec.shape)

list.append(vec)
print(len(list))
train=np.array(list)
print(train)